{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Getting started with OpenAssistant OASST1 data\n",
    "\n",
    "- https://huggingface.co/datasets/OpenAssistant/oasst1"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/master/notebooks/openassistant-oasst1/getting-started.ipynb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# uncomment and run below lines to set up if running in colab\n",
    "#!pip install datasets pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\andre\\Documents\\repos\\Open-Assistant\\venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from datasets import load_dataset\n",
    "\n",
    "# set some pandas options to make the output more readable\n",
    "pd.set_option(\"display.max_rows\", 500)\n",
    "pd.set_option(\"display.max_columns\", 500)\n",
    "pd.set_option(\"display.width\", 1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Found cached dataset parquet (C:/Users/andre/.cache/huggingface/datasets/OpenAssistant___parquet/OpenAssistant--oasst1-2e4c5fea546c88d6/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n",
      "100%|██████████| 2/2 [00:00<00:00, 15.88it/s]\n"
     ]
    }
   ],
   "source": [
    "# load dataset from huggingface datasets\n",
    "ds = load_dataset(\"OpenAssistant/oasst1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# lets convert the train dataset to a pandas df\n",
    "df = ds[\"train\"].to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 84437 entries, 0 to 84436\n",
      "Data columns (total 18 columns):\n",
      " #   Column           Non-Null Count  Dtype  \n",
      "---  ------           --------------  -----  \n",
      " 0   message_id       84437 non-null  object \n",
      " 1   parent_id        74591 non-null  object \n",
      " 2   user_id          84437 non-null  object \n",
      " 3   created_date     84437 non-null  object \n",
      " 4   text             84437 non-null  object \n",
      " 5   role             84437 non-null  object \n",
      " 6   lang             84437 non-null  object \n",
      " 7   review_count     84437 non-null  int32  \n",
      " 8   review_result    83732 non-null  object \n",
      " 9   deleted          84437 non-null  bool   \n",
      " 10  rank             48730 non-null  float64\n",
      " 11  synthetic        84437 non-null  bool   \n",
      " 12  model_name       0 non-null      object \n",
      " 13  detoxify         72297 non-null  object \n",
      " 14  message_tree_id  84437 non-null  object \n",
      " 15  tree_state       84437 non-null  object \n",
      " 16  emojis           71496 non-null  object \n",
      " 17  labels           84199 non-null  object \n",
      "dtypes: bool(2), float64(1), int32(1), object(14)\n",
      "memory usage: 10.1+ MB\n"
     ]
    }
   ],
   "source": [
    "# look at the df info\n",
    "df.info(verbose=True, memory_usage=True, show_counts=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{35238: {'message_id': 'cb9c1893-2eda-42ee-b0a7-adeb0e142867',\n",
       "  'parent_id': 'd24480be-94bd-4dc8-b7df-5eb973ed786f',\n",
       "  'user_id': '50f76166-c558-47be-80b1-5e2b2df6ff09',\n",
       "  'created_date': '2023-02-07T11:53:26.527725+00:00',\n",
       "  'text': 'Good job, are there other riddles like this that you know?',\n",
       "  'role': 'prompter',\n",
       "  'lang': 'en',\n",
       "  'review_count': 3,\n",
       "  'review_result': True,\n",
       "  'deleted': False,\n",
       "  'rank': nan,\n",
       "  'synthetic': False,\n",
       "  'model_name': None,\n",
       "  'detoxify': {'toxicity': 0.021441340446472168,\n",
       "   'severe_toxicity': 3.3909338526427746e-05,\n",
       "   'obscene': 0.0014250462409108877,\n",
       "   'identity_attack': 0.0003495119162835181,\n",
       "   'insult': 0.012425446882843971,\n",
       "   'threat': 5.629667793982662e-05,\n",
       "   'sexual_explicit': 7.260362326633185e-05},\n",
       "  'message_tree_id': '63f26a43-8b7d-4bb2-ad36-4309034c84cd',\n",
       "  'tree_state': 'ready_for_export',\n",
       "  'emojis': {'name': array(['+1', '_skip_reply'], dtype=object),\n",
       "   'count': array([2, 1])},\n",
       "  'labels': {'name': array(['spam', 'lang_mismatch', 'pii', 'not_appropriate', 'hate_speech',\n",
       "          'sexual_content', 'quality', 'toxicity', 'humor', 'creativity',\n",
       "          'violence'], dtype=object),\n",
       "   'value': array([0.        , 0.        , 0.        , 0.        , 0.        ,\n",
       "          0.        , 0.66666667, 0.375     , 0.75      , 0.25      ,\n",
       "          0.125     ]),\n",
       "   'count': array([3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2])}}}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# look at a sample row in a json format we can easily read\n",
    "df.sample(1).transpose().to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "e63d94b6-6f92-410f-b9d8-5cbd9367f2e3\n"
     ]
    }
   ],
   "source": [
    "# lets grab a random message tree\n",
    "message_tree_id = df[\"message_tree_id\"].sample(1).values[0]\n",
    "print(message_tree_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>message_id</th>\n",
       "      <th>parent_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>created_date</th>\n",
       "      <th>text</th>\n",
       "      <th>role</th>\n",
       "      <th>lang</th>\n",
       "      <th>review_count</th>\n",
       "      <th>review_result</th>\n",
       "      <th>deleted</th>\n",
       "      <th>rank</th>\n",
       "      <th>synthetic</th>\n",
       "      <th>model_name</th>\n",
       "      <th>detoxify</th>\n",
       "      <th>message_tree_id</th>\n",
       "      <th>tree_state</th>\n",
       "      <th>emojis</th>\n",
       "      <th>labels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>71993</th>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>None</td>\n",
       "      <td>a63181b7-107c-4fcc-913c-6871e68a9ffe</td>\n",
       "      <td>2023-02-05T22:58:35.549190+00:00</td>\n",
       "      <td>can you tell me a few good strategies to win a...</td>\n",
       "      <td>prompter</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.0033605224452912807, 'severe_to...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['+1', '_skip_reply', '_skip_ranking'...</td>\n",
       "      <td>{'name': ['spam', 'lang_mismatch', 'pii', 'not...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71996</th>\n",
       "      <td>8a58e588-f3c4-4190-a1e4-538c86abb17b</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>df6e7d47-7c0e-4958-8bc4-4558af0ece38</td>\n",
       "      <td>2023-02-06T03:37:06.246378+00:00</td>\n",
       "      <td>Sure, here are a few strategies that you can u...</td>\n",
       "      <td>assistant</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>1.0</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.000288925803033635, 'severe_tox...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['+1'], 'count': [3]}</td>\n",
       "      <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71994</th>\n",
       "      <td>4c27698c-01c7-42db-ac25-a2d6af5da4f4</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>7de84fb1-4cf5-4142-989d-f490fc526bfd</td>\n",
       "      <td>2023-02-06T03:56:59.785890+00:00</td>\n",
       "      <td>Certainly! Here are a few effective tactics fo...</td>\n",
       "      <td>assistant</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.00032153952633962035, 'severe_t...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['_skip_reply'], 'count': [1]}</td>\n",
       "      <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72000</th>\n",
       "      <td>d4154bbe-d137-4f3c-9382-8a3b306402f8</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>f14aa565-a0ed-4d18-b949-0daccb549f03</td>\n",
       "      <td>2023-02-06T04:21:18.793582+00:00</td>\n",
       "      <td>Settlers of Catan is a game of strategy, teamw...</td>\n",
       "      <td>assistant</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>2.0</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.000376795360352844, 'severe_tox...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['+1', '-1'], 'count': [1, 1]}</td>\n",
       "      <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71997</th>\n",
       "      <td>50416bbf-bf73-4022-833c-10c7ca062ad6</td>\n",
       "      <td>8a58e588-f3c4-4190-a1e4-538c86abb17b</td>\n",
       "      <td>0b53f16f-910a-453b-8101-dfe64c6dd30c</td>\n",
       "      <td>2023-02-06T06:23:21.276590+00:00</td>\n",
       "      <td>Do the Settlers of Catan expansions require di...</td>\n",
       "      <td>prompter</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.0004958086065016687, 'severe_to...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['_skip_reply', '_skip_ranking'], 'co...</td>\n",
       "      <td>{'name': ['spam', 'lang_mismatch', 'pii', 'not...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72001</th>\n",
       "      <td>81ba6358-9ac8-45b2-a202-723b605b8c95</td>\n",
       "      <td>d4154bbe-d137-4f3c-9382-8a3b306402f8</td>\n",
       "      <td>6a4224c2-6b7f-4981-85d3-5b26a75a5324</td>\n",
       "      <td>2023-02-06T08:25:07.775154+00:00</td>\n",
       "      <td>What are good long term strategies for getting...</td>\n",
       "      <td>prompter</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.00035797784221358597, 'severe_t...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['+1', '_skip_reply', '_skip_ranking'...</td>\n",
       "      <td>{'name': ['spam', 'lang_mismatch', 'pii', 'not...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71999</th>\n",
       "      <td>ed3fe459-77eb-4dd3-9672-46e5dcddbd2e</td>\n",
       "      <td>50416bbf-bf73-4022-833c-10c7ca062ad6</td>\n",
       "      <td>224c3d09-c385-46cd-93e2-5f3682ac2624</td>\n",
       "      <td>2023-02-06T08:43:38.464529+00:00</td>\n",
       "      <td>The expansions in \"Settlers of Catan\" can have...</td>\n",
       "      <td>assistant</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>1.0</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.00036860740510746837, 'severe_t...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['+1'], 'count': [1]}</td>\n",
       "      <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71998</th>\n",
       "      <td>0936e043-4cbb-4441-adaa-e5443c4cd99d</td>\n",
       "      <td>50416bbf-bf73-4022-833c-10c7ca062ad6</td>\n",
       "      <td>03d8200e-9023-423c-a3f0-47828a5b4189</td>\n",
       "      <td>2023-02-06T08:58:01.473538+00:00</td>\n",
       "      <td>Yes, the expansions for Settlers of Catan can ...</td>\n",
       "      <td>assistant</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.00033097557025030255, 'severe_t...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['_skip_labeling'], 'count': [1]}</td>\n",
       "      <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71995</th>\n",
       "      <td>43431488-6f0f-481e-9001-8407ab25b6cf</td>\n",
       "      <td>4c27698c-01c7-42db-ac25-a2d6af5da4f4</td>\n",
       "      <td>ecaa4818-48da-45a8-bab5-ae6de585ad8a</td>\n",
       "      <td>2023-02-06T10:19:04.483254+00:00</td>\n",
       "      <td>Is there an online community? Can i play it on...</td>\n",
       "      <td>prompter</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.000970872410107404, 'severe_tox...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>None</td>\n",
       "      <td>{'name': ['spam', 'lang_mismatch', 'pii', 'not...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72003</th>\n",
       "      <td>c0e39213-4fd0-4612-ab34-f5bc2b74300e</td>\n",
       "      <td>81ba6358-9ac8-45b2-a202-723b605b8c95</td>\n",
       "      <td>8a9d78b7-5b39-4033-8829-1b5486fdba76</td>\n",
       "      <td>2023-02-06T11:53:46.248417+00:00</td>\n",
       "      <td>A good long term strategy for getting the long...</td>\n",
       "      <td>assistant</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>1.0</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.0003667851269710809, 'severe_to...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['+1', '_skip_labeling'], 'count': [3...</td>\n",
       "      <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72004</th>\n",
       "      <td>f11e5cc5-2c08-4450-b619-89b8c8ec2862</td>\n",
       "      <td>81ba6358-9ac8-45b2-a202-723b605b8c95</td>\n",
       "      <td>e14803c4-5c0d-49f3-808d-f1b6dc80e8ed</td>\n",
       "      <td>2023-02-06T13:18:54.555828+00:00</td>\n",
       "      <td>In the board game \"Catan,\" \"the longest road\" ...</td>\n",
       "      <td>assistant</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>2.0</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.0003078073204960674, 'severe_to...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>{'name': ['_skip_labeling'], 'count': [1]}</td>\n",
       "      <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72002</th>\n",
       "      <td>7a60b98a-e15b-44b6-a049-63b96ce3cf43</td>\n",
       "      <td>81ba6358-9ac8-45b2-a202-723b605b8c95</td>\n",
       "      <td>f53ff129-eb80-4bfb-9468-ff6b6630d356</td>\n",
       "      <td>2023-02-06T13:28:06.829895+00:00</td>\n",
       "      <td>Getting the Longest Road is a key victory cond...</td>\n",
       "      <td>assistant</td>\n",
       "      <td>en</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>None</td>\n",
       "      <td>{'toxicity': 0.0003417894768062979, 'severe_to...</td>\n",
       "      <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
       "      <td>ready_for_export</td>\n",
       "      <td>None</td>\n",
       "      <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 message_id                             parent_id                               user_id                      created_date                                               text       role lang  review_count review_result  deleted  rank  synthetic model_name                                           detoxify                       message_tree_id        tree_state                                             emojis                                             labels\n",
       "71993  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3                                  None  a63181b7-107c-4fcc-913c-6871e68a9ffe  2023-02-05T22:58:35.549190+00:00  can you tell me a few good strategies to win a...   prompter   en             3          True    False   NaN      False       None  {'toxicity': 0.0033605224452912807, 'severe_to...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export  {'name': ['+1', '_skip_reply', '_skip_ranking'...  {'name': ['spam', 'lang_mismatch', 'pii', 'not...\n",
       "71996  8a58e588-f3c4-4190-a1e4-538c86abb17b  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  df6e7d47-7c0e-4958-8bc4-4558af0ece38  2023-02-06T03:37:06.246378+00:00  Sure, here are a few strategies that you can u...  assistant   en             3          True    False   1.0      False       None  {'toxicity': 0.000288925803033635, 'severe_tox...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export                     {'name': ['+1'], 'count': [3]}  {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
       "71994  4c27698c-01c7-42db-ac25-a2d6af5da4f4  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  7de84fb1-4cf5-4142-989d-f490fc526bfd  2023-02-06T03:56:59.785890+00:00  Certainly! Here are a few effective tactics fo...  assistant   en             3          True    False   0.0      False       None  {'toxicity': 0.00032153952633962035, 'severe_t...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export            {'name': ['_skip_reply'], 'count': [1]}  {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
       "72000  d4154bbe-d137-4f3c-9382-8a3b306402f8  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  f14aa565-a0ed-4d18-b949-0daccb549f03  2023-02-06T04:21:18.793582+00:00  Settlers of Catan is a game of strategy, teamw...  assistant   en             3          True    False   2.0      False       None  {'toxicity': 0.000376795360352844, 'severe_tox...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export            {'name': ['+1', '-1'], 'count': [1, 1]}  {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
       "71997  50416bbf-bf73-4022-833c-10c7ca062ad6  8a58e588-f3c4-4190-a1e4-538c86abb17b  0b53f16f-910a-453b-8101-dfe64c6dd30c  2023-02-06T06:23:21.276590+00:00  Do the Settlers of Catan expansions require di...   prompter   en             3          True    False   NaN      False       None  {'toxicity': 0.0004958086065016687, 'severe_to...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export  {'name': ['_skip_reply', '_skip_ranking'], 'co...  {'name': ['spam', 'lang_mismatch', 'pii', 'not...\n",
       "72001  81ba6358-9ac8-45b2-a202-723b605b8c95  d4154bbe-d137-4f3c-9382-8a3b306402f8  6a4224c2-6b7f-4981-85d3-5b26a75a5324  2023-02-06T08:25:07.775154+00:00  What are good long term strategies for getting...   prompter   en             3          True    False   NaN      False       None  {'toxicity': 0.00035797784221358597, 'severe_t...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export  {'name': ['+1', '_skip_reply', '_skip_ranking'...  {'name': ['spam', 'lang_mismatch', 'pii', 'not...\n",
       "71999  ed3fe459-77eb-4dd3-9672-46e5dcddbd2e  50416bbf-bf73-4022-833c-10c7ca062ad6  224c3d09-c385-46cd-93e2-5f3682ac2624  2023-02-06T08:43:38.464529+00:00  The expansions in \"Settlers of Catan\" can have...  assistant   en             3          True    False   1.0      False       None  {'toxicity': 0.00036860740510746837, 'severe_t...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export                     {'name': ['+1'], 'count': [1]}  {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
       "71998  0936e043-4cbb-4441-adaa-e5443c4cd99d  50416bbf-bf73-4022-833c-10c7ca062ad6  03d8200e-9023-423c-a3f0-47828a5b4189  2023-02-06T08:58:01.473538+00:00  Yes, the expansions for Settlers of Catan can ...  assistant   en             3          True    False   0.0      False       None  {'toxicity': 0.00033097557025030255, 'severe_t...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export         {'name': ['_skip_labeling'], 'count': [1]}  {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
       "71995  43431488-6f0f-481e-9001-8407ab25b6cf  4c27698c-01c7-42db-ac25-a2d6af5da4f4  ecaa4818-48da-45a8-bab5-ae6de585ad8a  2023-02-06T10:19:04.483254+00:00  Is there an online community? Can i play it on...   prompter   en             3          True    False   NaN      False       None  {'toxicity': 0.000970872410107404, 'severe_tox...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export                                               None  {'name': ['spam', 'lang_mismatch', 'pii', 'not...\n",
       "72003  c0e39213-4fd0-4612-ab34-f5bc2b74300e  81ba6358-9ac8-45b2-a202-723b605b8c95  8a9d78b7-5b39-4033-8829-1b5486fdba76  2023-02-06T11:53:46.248417+00:00  A good long term strategy for getting the long...  assistant   en             3          True    False   1.0      False       None  {'toxicity': 0.0003667851269710809, 'severe_to...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export  {'name': ['+1', '_skip_labeling'], 'count': [3...  {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
       "72004  f11e5cc5-2c08-4450-b619-89b8c8ec2862  81ba6358-9ac8-45b2-a202-723b605b8c95  e14803c4-5c0d-49f3-808d-f1b6dc80e8ed  2023-02-06T13:18:54.555828+00:00  In the board game \"Catan,\" \"the longest road\" ...  assistant   en             3          True    False   2.0      False       None  {'toxicity': 0.0003078073204960674, 'severe_to...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export         {'name': ['_skip_labeling'], 'count': [1]}  {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
       "72002  7a60b98a-e15b-44b6-a049-63b96ce3cf43  81ba6358-9ac8-45b2-a202-723b605b8c95  f53ff129-eb80-4bfb-9468-ff6b6630d356  2023-02-06T13:28:06.829895+00:00  Getting the Longest Road is a key victory cond...  assistant   en             3          True    False   0.0      False       None  {'toxicity': 0.0003417894768062979, 'severe_to...  e63d94b6-6f92-410f-b9d8-5cbd9367f2e3  ready_for_export                                               None  {'name': ['spam', 'fails_task', 'lang_mismatch..."
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# look at all data for this message tree\n",
    "df.query(f\"message_tree_id == '{message_tree_id}'\").sort_values(\"created_date\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.7"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
